In this project, we seek to explore the relationships between income levels and health, as income is frequently brought up as one of the social determinants of health. We will explore this relationship at multiple levels: Internationally, nationally, and locally.
Two main indicators of health are health outcomes, such as life expectancy, and, in the United States, health insurance coverage. We will explore how these measures vary over time and across geography.
library(ggplot2)
library(plotly)
library(gapminder)
library(tidyr)
library(dplyr)
library(gganimate)
library(ggthemes)
At the international level, we looked at life expectancy as a function of GDP per capita. From 1952 to 2007, life expectancy has increased overall, and we can clearly see that in most countries, as the national wealth increases, the life expectancy also increases.
p <- ggplot(gapminder, aes(x = gdpPercap, y=lifeExp, size = pop, colour = country)) +
geom_point(show.legend = FALSE, alpha = 0.7) +
scale_color_viridis_d() +
scale_size(range = c(2, 12)) +
scale_x_log10() +
theme_economist() +
labs(x="Weath (GDP per capita)",
y="Life expectancy",
title = "Life Expectancy vs. Income in the world",
caption = "Source: IHME")
p + transition_time(year) +
labs(title = "Life expectancy vs. GDP by country: {frame_time}")+
view_follow(fixed_y = TRUE)
This pattern continues at the national level in the US: The richest American men live 15 years longer than the poorest men, while the richest American women live 10 years longer than the poorest women.
library(RColorBrewer)
df1 <- read.csv("Data/health_ineq_online_table_1.csv")
p1 <- ggplot(df1, aes(pctile, le_raceadj),alpha = 1)+
geom_line(aes(color=gnd))+
theme_economist() +
labs(x="\nHousehold income percentile",
y="Life expectancy at age 40\n",
title = "Life expectancy vs. income in the US\n",
caption = "Source: Health Inequality Project") +
scale_color_brewer(palette = "Dark2") +
theme(axis.line = element_line(colour = "black"),
plot.title = element_text(face = "bold.italic", hjust = 0.5),
legend.position = "top", legend.title = element_blank())
p1
df3 <- read.csv("Data/health_ineq_online_table_3_female.csv")
df3_2001 <-gather(df3,incomepetile,life,Q1:Q4,factor_key = TRUE)
name <- c("California", "Texas", "Florida", "New York", "Pennsylvania", "Illinois", "Ohio", "Georgia")
df_sub <- df3_2001%>%
filter(statename %in% name)
p3 <- ggplot(data = df_sub, aes(y = life, x = incomepetile,color = statename, group = statename))+
geom_line(size = 1)+
theme_bw()+
theme_economist() +
scale_color_brewer(palette = "Dark2") +
labs(x="\nHousehold income quartile",
y="Life expectancy at age 40\n",
title = "Female life expectancy vs. income\n",
caption = "Source: Health Inequality Project") +
theme(axis.line = element_line(colour = "black"),
plot.title = element_text(face = "bold.italic", hjust = 0.5))+
theme(legend.position = "right", legend.title = element_blank())
p3
df4 <- read.csv("Data/health_ineq_online_table_3_male.csv")
df4_2001 <-gather(df4,incomepetile,life,Q1:Q4,factor_key = TRUE)
df_sub_2 <- df4_2001%>%
filter(statename %in% name)
p4 <- ggplot(data = df_sub_2, aes(y = life, x = incomepetile,color = statename, group = statename))+
geom_line(size = 1)+
theme_bw()+
theme_economist() +
scale_color_brewer(palette = "Dark2") +
labs(x="\nHousehold income quartile",
y="Life expectancy at age 40\n",
title = "Male life expectancy vs. income\n",
caption = "Source: Health Inequality Project") +
theme(axis.line = element_line(colour = "black"),
plot.title = element_text(face = "bold.italic", hjust = 0.5))+
theme(legend.position = "right", legend.title = element_blank())
p4
library(ggplot2)
library(ggthemes)
library(tmap)
library(dplyr)
library(tidyverse)
library(leaflet)
library(RColorBrewer)
library(rgeos)
library(lubridate)
library(plotly)
library(rgdal)
library(leafsync)
library(tigris)
library(acs)
library(stringr)
library(sp)
library(shiny)
library(shinythemes)
library(maptools)
library(gganimate)
library(magick)
library(gapminder)
lower48 <- counties(setdiff(state.name, c("Hawaii", "Alaska")), class = "sf")
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================= | 34%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================ | 64%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================= | 94%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
lower48$STATECOUNTYFP <- paste0(lower48$STATEFP, lower48$COUNTYFP)
Income <-read_csv("Data/IncomeData.csv")
Income <- Income %>%
mutate( STATECOUNTYFP = str_pad(`GEOID`, 5, "left", pad = "0"), Year = year)
Diabetes <- read_csv("Data/DiabetesData.csv")
Diabetes <- Diabetes %>%
mutate(STATECOUNTYFP = str_pad(`CountyFIPS`, 5, "left", pad = "0"), Percentage = as.numeric(Percentage))
countydiabetes <- merge(lower48, Diabetes, by = "STATECOUNTYFP", duplicateGeoms = TRUE)
countydiabetesincome <- merge(countydiabetes, Income, by = c("STATECOUNTYFP", "Year"), duplicateGeoms = TRUE) %>%
mutate(year = as.integer(year))
countydiabetesincome$adjustedrate <- countydiabetesincome$Percentage/countydiabetesincome$income_per_capita
This plot demonstrates a clear correlation between diabetes rates and income, with higher income counties having lower rates of diabetes. It also shows that while diabetes has increased everywhere over time, the increases have been far more pronounced for poorer counties. Note especially the sharp spike in rates over 2015-2016
library(scales)
diabetesplot <- ggplot(filter(countydiabetesincome, income_per_capita < 100000),
aes(x = income_per_capita, y = Percentage, color = region_name)) +
geom_jitter(alpha = .7) +
theme_economist() +
scale_color_brewer(palette = "Dark2") +
labs(x="\nIncome per capita",
title = "Diabetes rate vs. income\n",
caption = "Source: CDC") +
scale_y_continuous(labels = function(x) paste0(x, "%")) +
scale_x_continuous(labels = dollar) +
theme(axis.line = element_line(colour = "black"),
plot.title = element_text(face = "bold.italic", hjust = 0.5),
legend.position = "top", legend.title = element_blank(),
axis.title.y = element_blank())
diabetesplotanim <- diabetesplot + transition_time(year) +
labs(title = "Diabetes rate vs. income: {frame_time}\n")
diabetesplotanim
The first map shows diabetes rates as a percentage of the population for each county in the US. The second maps an adjusted metric, the diabetes ratio/per-capita income ratio. This income-adjusted adjuested ratio is intended to account for variance due to income. Geographic differences in diabetes rates are more muted in the adjusted map, indicating that some of the differences in diabetes levels are actually the result of income clustering.
diabetes <- tm_shape(filter(countydiabetes, Year == "2016")) +
tm_fill(col = "Percentage", title = "", pal = "YlOrRd", n = 5) +
tm_layout(frame = FALSE, legend.format = list(
fun = function(x) paste0(formatC(x, digits = 0, format = "f"), "%")))
tmap_save(diabetes, "diabetes_counties_2016_map.png")
diabetes_adjusted <- tm_shape(filter(countydiabetesincome, Year == "2016")) +
tm_fill(col = "adjustedrate", title = "Diabetes-to-\nincome ratio", pal = "YlOrRd", n = 5) +
tm_layout(frame = FALSE, legend.format = list(
fun = function(x) paste0(formatC(x, digits = 4, format = "f"))))
tmap_save(diabetes_adjusted, "diabetes_income_ratio_counties_2016_map.png")
LifeExp <- read_csv("Data/ExpectancyData.csv")
LifeExp <- LifeExp %>%
mutate(clean_lifeexp = as.numeric(str_extract(`Life expectancy`, "^\\d*.\\d*")),
STATECOUNTYFP = str_pad(`FIPS`, 5, "left", pad = "0"))
countylife <- merge(lower48, LifeExp, by = "STATECOUNTYFP", duplicateGeoms = TRUE)
countylifeincome <- merge(countylife, Income, by = c("STATECOUNTYFP", "Year"), duplicateGeoms = TRUE) %>%
mutate(year = as.integer(year))
countylifeincome$adjustedrate <- countylifeincome$clean_lifeexp/countylifeincome$income_per_capita
The plot demonstrates a clear relatinoship between life expectancy and income, with higher income counties having higher average life expectancy. There is a worrying pattern of life expectancy increasing for higher-income counties while life expectancy is constant or falling in lower-income counties.
lifeplot <- ggplot(filter(countylifeincome, income_per_capita < 100000),
aes(x = income_per_capita, y = clean_lifeexp, color = region_name)) +
geom_jitter(alpha = .7) +
theme_economist() +
scale_color_brewer(palette = "Dark2") +
labs(x="\nIncome per capita",
title = "Life expectancy vs. income\n",
caption = "Source: IHME") +
scale_x_continuous(labels = dollar) +
theme(axis.line = element_line(colour = "black"),
plot.title = element_text(face = "bold.italic", hjust = 0.5),
legend.position = "top", legend.title = element_blank(),
axis.title.y = element_blank())
lifeplotanim <- lifeplot + transition_time(year) +
labs(title = "Life expectancy vs. income: {frame_time}")
lifeplotanim
The first map shows life expectancy for each county in the US. The second maps an adjusted metric, life expectancy/income per capita. Geographic differences in life expectancy (intensity of color variation) are significantly more muted in the adjusted map, indicating that much of the variance in life expectancy is the result of income clustering.
life_expectancy <- tm_shape(filter(countylife, Year == "2014")) +
tm_fill(col = "clean_lifeexp", title = "", pal = "YlGn", n = 5) +
tm_layout(frame = FALSE, legend.format = list(
fun = function(x) paste0(formatC(x, digits = 0, format = "f"))))
tmap_save(life_expectancy, "life_expectancy_counties_2014_map.png")
life_expectancy_adjusted <- tm_shape(filter(countylifeincome, Year == "2014")) +
tm_fill(col = "adjustedrate", title = "Life expectancy-to-\nincome ratio", pal = "YlGn", n = 5) +
tm_layout(frame = FALSE, legend.format = list(
fun = function(x) paste0(formatC(x, digits = 3, format = "f"))))
tmap_save(life_expectancy_adjusted, "life_expectancy_adjusted_counties_2014_map.png")
library(readxl)
im_data <- read_excel("Data/Linked_Birth_Infant_Death_Records_per_1000_by_County_and_Year_Died_2007-2017.xlsx")
im_data <- im_data %>%
mutate(STATECOUNTYFP = str_pad(`County Code`, 5, "left", pad = "0"),
Deathrate = as.numeric(`Death Rate`))
countyinfant <- merge(lower48, im_data, by = "STATECOUNTYFP", duplicateGeoms = TRUE)
countyinfantincome <- merge(im_data, Income, by = "STATECOUNTYFP", duplicateGeoms = TRUE) %>%
mutate(year = as.integer(year))
countyinfantincome$adjustedrate <- countyinfantincome$Deathrate/countyinfantincome$income_per_capita
Infant mortality shows a clear correlation to income, with higher income counties having lower infant mortality. Neither the pattern nor the overall incidence of child mortality appears to be changing.
infantplot <- ggplot(filter(countyinfantincome, income_per_capita < 100000),
aes(x = income_per_capita, y = Deathrate, color = region_name)) +
geom_jitter(alpha = .7) +
theme_economist() +
scale_color_brewer(palette = "Dark2") +
labs(x="\nIncome per capita",
y = "Deaths per 1,000 live births\n",
title = "Infant mortality vs. income\n",
caption = "Source: CDC") +
scale_x_continuous(labels = dollar) +
theme(axis.line = element_line(colour = "black"),
plot.title = element_text(face = "bold.italic", hjust = 0.5),
legend.position = "top", legend.title = element_blank())
infantplotanim <- infantplot + transition_time(year) +
labs(title = "Infant mortality vs. income: {frame_time}")
infantplotanim